Preparing the data
library(readxl)
library(openxlsx)
stuns <- read_excel("data/2015 Reported Taser Data.xlsx", sheet=1)
stuns[1,] <- ifelse(is.na(stuns[1,]), colnames(stuns), stuns[1,])
colnames(stuns) <- stuns[1,]
stuns <- stuns[-1,]
colnames(stuns) <- make.names(colnames(stuns))
# another stream but for python data analysis going on right now:
# https://www.livecoding.tv/jakekara/
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Cleaning up the data
colnames(stuns) <- c("Law.Enforcement.Agency",
"Number.of.Incident.Reports",
"Incident.Case.Number",
"Date.of.Incident",
"Time.of.Incident",
"Sex",
"Race",
"Hispanic",
"Height",
"Weight",
"Age",
"Deployment.Type.I",
"Deployment.Type.II",
"Displays.or.Arc",
"Drive.Stun.Application",
"Activation.After.Probe.Contact",
"Number.of.Deployments",
"Warning.to.Subject",
"Subject.Injured",
"Subject.Not.Injured",
"Subject.Bruises",
"Subject.Abrasions",
"Subject.Breathing.Difficulty",
"Subject.Probe.Puncture.Only",
"Subject.Lost.Consciousness",
"Subject.Death",
"Subject.Other",
"Officer.Injured",
"Officer.Not.Injured",
"Officer.Bruises",
"Officer.Abrasions",
"Officer.Breathing.Difficulty",
"Officer.Probe.Puncture.Only",
"Officer.Lost.Consciousness",
"Officer.Death",
"Officer.Other",
"Location.Environment",
"Officer.s.Arrival",
"If.Other..Explain",
"Crime.in.Progress",
"Domestic.Disturbance",
"Disturbance..Other.",
"Traffic.Stop",
"Emotionally.Disturbed.Person",
"Suspicious.Person",
"Executing.Warrant",
"Under.Influence",
"Activity.Other",
"Non.aggressive",
"Previous.Hostility",
"Possibly.Intoxicated",
"Emotionally.Disturbed",
"Aggressive..Verbal.",
"Aggressive..Physical.",
"Armed.with",
"Officer.Perception.Other",
"Threat..Hostile",
"Dead.Weight..Non.Compliant",
"Fighting.Stance..Combative",
"Threaten.Use.of.Weapon",
"Fleeing",
"Unarmed.Assault",
"Armed.with.Firearm",
"Armed.with.Edged.Weapon",
"Armed.with.Blunt.Instrument",
"Armed.with.Other",
"Failed.to.Follow.Directions",
"Suicidal",
"Resistance.Other")
stuns$race_ethnicity <- ifelse(stuns$Hispanic==1, "Hispanic", stuns$Race)
Stun incidents by race in the state
by_state <- stuns %>%
group_by(race_ethnicity) %>%
summarise(total=n()) %>%
mutate(percent=round(total/sum(total)*100,2))
library(knitr)
kable(by_state)
| Asian |
2 |
0.33 |
| Black |
187 |
30.66 |
| Hispanic |
130 |
21.31 |
| Unknown |
1 |
0.16 |
| White |
290 |
47.54 |
Total stun incidents per department
library(tidyr)
library(DT)
by_dept_total <- stuns %>%
group_by(Law.Enforcement.Agency, race_ethnicity) %>%
summarise(total=n()) %>%
spread(race_ethnicity, total)
datatable(by_dept_total)
Stun incidents per department by race
by_dept_percent <- stuns %>%
group_by(Law.Enforcement.Agency, race_ethnicity) %>%
summarise(total=n()) %>%
mutate(percent=round(total/sum(total)*100,2)) %>%
select(Law.Enforcement.Agency, race_ethnicity, percent) %>%
spread(race_ethnicity, percent)
datatable(by_dept_percent)
Time of stun incidents
stuns$Time.of.Incident <- convertToDateTime(as.numeric(stuns$Time.of.Incident), origin = "2016-07-04")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
stuns$hour <- hour(stuns$Time.of.Incident)
library(ggplot2)
library(extrafont)
## Registering fonts with R
library(ggalt)
ggplot(stuns, aes(hour)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

# Time of stun incidents by race
ggplot(stuns, aes(hour, fill=race_ethnicity)) + geom_histogram(binwidth=1)
## Warning: Removed 2 rows containing non-finite values (stat_bin).

Month
stuns$Date.of.Incident <- as.POSIXct(as.numeric(stuns$Date.of.Incident) * (60*60*24)
, origin="1899-12-30"
, tz="GMT")
stuns$month <- month(stuns$Date.of.Incident, label=TRUE)
ggplot(stuns, aes(month)) + geom_bar()

ggplot(stuns, aes(month)) + geom_bar() + facet_grid(race_ethnicity ~.)

Day of the week
stuns$day <- wday(stuns$Date.of.Incident, label=TRUE)
ggplot(stuns, aes(day)) + geom_bar()

ggplot(stuns, aes(day)) + geom_bar() + facet_grid(race_ethnicity ~.)

Mapping total stun gun incidents
## Mapping
require(scales)
## Loading required package: scales
require(dplyr)
require(gtools)
## Loading required package: gtools
require(ggplot2)
require(rgdal)
## Loading required package: rgdal
## Warning: package 'rgdal' was built under R version 3.2.5
## Loading required package: sp
## rgdal: version: 1.1-10, (SVN revision 622)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.11.4, released 2016/01/25
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj
## Linking to sp version: 1.2-3
require(ggmap)
## Loading required package: ggmap
require(Cairo)
## Loading required package: Cairo
require(gpclib)
## Loading required package: gpclib
## General Polygon Clipper Library for R (version 1.5-5)
## Type 'class ? gpc.poly' for help
require(maptools)
## Loading required package: maptools
## Checking rgeos availability: TRUE
require(reshape)
## Loading required package: reshape
##
## Attaching package: 'reshape'
## The following object is masked from 'package:lubridate':
##
## stamp
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:dplyr':
##
## rename
library(devtools)
## Warning: package 'devtools' was built under R version 3.2.5
library(stringr)
library(raster)
##
## Attaching package: 'raster'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:dplyr':
##
## select
library(sp)
by_dept_total[is.na(by_dept_total)] <-0
by_dept_total$total <- by_dept_total$Asian + by_dept_total$Black + by_dept_total$Hispanic + by_dept_total$Unknown + by_dept_total$White
gpclibPermit()
## Warning in gpclibPermit(): support for gpclib will be withdrawn from
## maptools at the next major release
## [1] TRUE
gpclibPermitStatus()
## [1] TRUE
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")
names(by_dept_total)[names(by_dept_total) == 'Law.Enforcement.Agency'] <- 'id'
total_map <- left_join(townborders, by_dept_total)
## Joining by: "id"
tm_ct <- ggplot() +
geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Where people are tased (total)", fill="")
print(tm_ct)

Mapping percent by race
# White
names(by_dept_percent)[names(by_dept_percent) == 'Law.Enforcement.Agency'] <- 'id'
percent_map <- left_join(townborders, by_dept_percent)
## Joining by: "id"
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=White), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of white suspects tased", fill="")
print(tm_ct)

# Black
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Black), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of Black suspects tased", fill="")
print(tm_ct)

# Hispanic
tm_ct <- ggplot() +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
geom_polygon(data = percent_map, aes(x=long, y=lat, group=group, fill=Hispanic), color = "black", size=0.2) +
coord_map() +
scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
theme_nothing(legend=TRUE) +
labs(title="Percent of Hispanic suspects tased", fill="")
print(tm_ct)

Gender breakdown
gender <- stuns %>%
group_by(Sex) %>%
summarise(Count=n()) %>%
mutate(Percent=round(Count/sum(Count)*100,2))
kable(gender)
| Female |
37 |
6.07 |
| Male |
573 |
93.93 |
Age distribution
stuns$Age <- as.numeric(stuns$Age)
# Age distribution
ggplot(stuns, aes(Age)) + geom_histogram(binwidth=1)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

ggplot(stuns, aes(Age)) + geom_histogram(binwidth=10)
## Warning: Removed 6 rows containing non-finite values (stat_bin).

stuns3 <- subset(stuns, Age < 100)
stuns3 <- subset(stuns3, race_ethnicity!="Asian")
stuns3 <- subset(stuns3, race_ethnicity!="Unknown")
gg <- ggplot(stuns3, aes(Age)) + geom_histogram(fill="#bf6151", binwidth=2) + facet_grid(race_ethnicity ~ .)
gg <- gg + labs(x="Age", y="Stuns deployed", title="Distribution of stuns by age",
subtitle="",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

Where does this happen?
locations <- stuns %>%
group_by(Location.Environment) %>%
summarise(count=n()) %>%
mutate(percent=round(count/sum(count)*100,2))
kable(locations)
| Commercial Establishment |
20 |
3.28 |
| Educational Facility |
7 |
1.15 |
| Indoors-Private Property |
16 |
2.62 |
| Indoors-Public Building |
44 |
7.21 |
| Other Residence |
31 |
5.08 |
| Outdoors-Private Property |
76 |
12.46 |
| Outdoors-Public Area |
241 |
39.51 |
| Subject’s Residence |
172 |
28.20 |
| NA |
3 |
0.49 |
gg <- ggplot(stuns3, aes(Location.Environment)) + facet_grid(race_ethnicity ~.) + geom_bar(fill="#bf6151") + coord_flip()
gg <- gg + labs(x=NULL, y="Stuns deployed", title="Distribution of stuns by location",
subtitle="",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22, hjust=-1.1))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
## Warning: Removed 3 rows containing non-finite values (stat_count).

What is the subject holding
stuns$Armed.with <- str_to_title(stuns$Armed.with)
weapon <- stuns %>%
group_by(Armed.with) %>%
summarise(count=n()) %>%
arrange(-count)
weapon <- subset(weapon, !is.na(Armed.with))
kable(weapon)
| Knife |
26 |
| Baseball Bat |
3 |
| Broken Glass |
3 |
| Firearm |
3 |
| Hammer |
3 |
| Vehicle |
3 |
| Box Cutter |
2 |
| Possible Knife |
2 |
| 1 |
1 |
| 12" Kitchen Knife |
1 |
| 15" Wooden Stake |
1 |
| 2 Knives Within Wingspan |
1 |
| Arrow |
1 |
| Butcher Knife |
1 |
| Chair |
1 |
| Facsimile Firearm |
1 |
| Glass Bottle, Possibly Sledgehammer |
1 |
| Handgun |
1 |
| Handgun Pointed (Later Determined Replica) |
1 |
| Heavy Metal Stanchion |
1 |
| Knife |
1 |
| Knife And Dagger |
1 |
| Knife Sharpening Steel 14" Long |
1 |
| Knife To His Neck |
1 |
| Knife, Possibly A Machete |
1 |
| Large Butcher Knife |
1 |
| Large K-Bar Knife |
1 |
| Large Kitchen Knife |
1 |
| Large Stick |
1 |
| Metal Cane |
1 |
| Metal Pipe |
1 |
| Object With Pointed Tip In Hand |
1 |
| Pistol |
1 |
| Pitbull |
1 |
| Pocket Knife |
1 |
| Possible Firearm |
1 |
| Possible Gun |
1 |
| Possible Weapon-History Of Weapons Charges |
1 |
| Possibly Long Rifle |
1 |
| Rifle |
1 |
| Sword |
1 |
| Threatened Use Of Knife To Kill |
1 |
| Unknown If Armed |
1 |
| Unknown Object In Hand |
1 |
| Utility Knife |
1 |
| Winged Cork Screw |
1 |
| Wooden Closet Rod |
1 |
How often tased
stuns$Number.of.Deployments <- as.numeric(stuns$Number.of.Deployments)
## Warning: NAs introduced by coercion
deployed <- stuns %>%
group_by(race_ethnicity) %>%
summarise(mean(Number.of.Deployments, na.rm=T))
kable(deployed)
| Asian |
1.0000000 |
| Black |
1.1314286 |
| Hispanic |
1.1869919 |
| Unknown |
1.0000000 |
| White |
0.9037037 |
Doesn’t say much. Let’s try another factor
stuns$deployment <- ifelse(stuns$Number.of.Deployments>1, "multiple", "single")
stuns$deployment <- ifelse(stuns$Number.of.Deployments==0, "none", stuns$deployment)
deployed <- stuns %>%
group_by(race_ethnicity, deployment) %>%
summarise(count=n()) %>%
mutate(percent=round(count/sum(count)*100,2))
deployed <- deployed[c("race_ethnicity", "deployment", "percent")]
deployed <- subset(deployed, !is.na(deployment))
deployed <- deployed %>%
spread(deployment, percent)
kable(deployed)
| Asian |
NA |
NA |
100.00 |
| Black |
19.79 |
19.79 |
54.01 |
| Hispanic |
27.69 |
33.08 |
33.85 |
| Unknown |
NA |
NA |
100.00 |
| White |
17.93 |
40.00 |
35.17 |
Weights
stuns$Weight <- as.numeric(stuns$Weight)
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))
gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Weight))
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments))))
gg <- gg + labs(x="Stuns deployed", y="Pounds", title="Distribution of number of stuns by suspect's weight",
subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg

b2<-ggplot(stuns, aes(factor(Number.of.Deployments), Weight)) +
geom_jitter(alpha=I(1/4), aes(color=Number.of.Deployments)) +
theme(legend.position = "none")
Heights
stuns$Height <- ifelse(stuns$Height=="6'", "6'0\"", stuns$Height)
stuns$Height <- ifelse(stuns$Height=="5'", "5'0\"", stuns$Height)
stuns$Height.inches <- sapply(strsplit(as.character(stuns$Height),"'|\""),
function(x){12*as.numeric(x[1]) + as.numeric(x[2])})
stuns2 <- subset(stuns, !is.na(Number.of.Deployments))
gg <- ggplot(stuns2, aes(factor(Number.of.Deployments), Height.inches))
gg <- gg + geom_boxplot((aes(fill=factor(Number.of.Deployments))))
gg <- gg + labs(x="Stuns deployed", y="Inches", title="Distribution of number of stuns by suspect's height",
subtitle="The median number of times a person is stunned by police tends to trend with heaviness of suspect.\nNote: Sample size for 5-10 deployments is between 1 and 4 each— too small to draw conclusions.",
caption="SOURCE: CCSU Institute for Municipal and Regional Policy Management \nAndrew Ba Tran/TrendCT.org")
gg <- gg + theme_bw(base_family="Calibri")
gg <- gg + theme(text = element_text(size=20))
#gg <- gg + theme(panel.grid.major=element_blank())
#gg <- gg + theme(panel.grid.minor=element_blank())
gg <- gg + theme(panel.border=element_blank())
gg <- gg + theme(text = element_text(size=20))
gg <- gg + theme(axis.ticks=element_blank())
gg <- gg + theme(legend.position = "none")
#gg <- gg + theme(axis.text.x=element_blank())
gg <- gg + theme(plot.title=element_text(face="bold", family="Lato Black", size=22))
gg <- gg + theme(plot.subtitle=element_text(face="italic", size=15, margin=margin(b=12)))
gg <- gg + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e"))
gg
